In [1]:
import os
import numpy as np
import pandas as pd
import seaborn; seaborn.set_theme()
import plotly.io; plotly.io.templates.default = "seaborn"
import emat
import yaml
from emat.util.show_dir import show_dir
from emat.analysis import display_experiments
import logging
from emat.util.loggers import log_to_stderr
log = log_to_stderr(logging.INFO)
import emat_vestate
import asyncio
database_path = os.path.expanduser("~/EMAT-VE/ve2-rspm-2022-02-23.db")
In [2]:
initialize = not os.path.exists(database_path)
db = emat.SQLiteDB(database_path, initialize=initialize)
model_scope = emat.Scope(os.path.join(os.getcwd(),'vestate-emat-files', 'odot-otp-scope.yml'))
fx = emat_vestate.VEStateModel(db=db, scope=model_scope)
[00:29.12] MainProcess/INFO: running script emat_db_init.sql
[00:29.13] MainProcess/INFO: running script meta_model.sql
[00:29.14] MainProcess/INFO: found no experiments with missing run_id's
[00:29.14] MainProcess/INFO: running script emat_db_init_views.sql
[00:29.19] MainProcess/WARNING: changing cwd to E:\Projects\Clients\odot\ODOT_VEState\Github\sources\models\VE-State-EMAT\Temporary\tmpug0jszi7
In [3]:
model_scope.info()
name: VEState
desc: VisionEval State Strategic Planning Model
uncertainties:
  LUDENSITYMIX = 0.0 to 0.3
  INTDENSITYSCEN = 0.8 to 1.0
  HHPOPGROWTHRATE = categorical
  INCOMEGROWTHRATE = 0.9 to 1.15
  LDVECODRVSCEN = 0.0 to 1.0
measures:
  HouseholdDvmt
  HouseholdCarSvcDvmt
  ComSvcDvmt
  PTVanDvmt
  LdvDvmt
  HvyTruckDvmt
  BusDvmt
  TotalDvmt
  TotalGGE
  AverageLdvMpg
  HouseholdCO2e
  MetroHhDvmt
  MetroAveHhDvmtPerHh
  MetroAveHhDvmtPerPrsn
  MetroAveHhDvmtPerVeh
  MetroHhGGE
  MetroHhCO2e
In [5]:
# design1 = fx.design_experiments(n_samples=300, design_name='exp_300')
# design1 = fx.design_experiments(n_samples=30, design_name='exp_30')
design1 = fx.db.read_experiment_all(fx.scope.name,design_name='exp_300')
design1
Out[5]:
LUDENSITYMIX INTDENSITYSCEN HHPOPGROWTHRATE INCOMEGROWTHRATE LDVECODRVSCEN
experiment
1 0.256121 0.990824 mid 0.919153 0.736541
2 0.189457 0.943774 high 1.127183 0.328131
3 0.044362 0.809831 low 0.951936 0.520699
4 0.178956 0.893921 mid 0.923353 0.355188
5 0.032498 0.860648 mid 0.953031 0.618596
... ... ... ... ... ...
296 0.228483 0.952907 mid 1.099318 0.834123
297 0.086440 0.898342 high 0.970182 0.965528
298 0.267638 0.886844 low 1.049866 0.590587
299 0.039768 0.938527 high 1.125158 0.330551
300 0.152111 0.866245 high 1.079549 0.589367

300 rows × 5 columns

In [ ]:
# params = design1.loc[1,['LUDENSITYMIX', 'INTDENSITYSCEN', 'HHPOPGROWTHRATE', 'INCOMEGROWTHRATE']].to_dict()
# # params = {key:params.get(key) for key in ['LUDENSITYMIX', 'INTDENSITYSCEN', 'HHPOPGROWTHRATE', 'INCOMEGROWTHRATE']}
# params
In [ ]:
# fx.setup(params)
In [ ]:
# fx.run()
In [ ]:
# fx.load_measures()
In [6]:
background = fx.async_experiments(
    design=design1,
    max_n_workers=15,
    batch_size=1,
)
[00:49.09] MainProcess/INFO: asynchronous_experiments(max_n_workers=15)
[00:49.10] MainProcess/INFO: AsyncExperimentalDesign.run start
[00:49.10] MainProcess/INFO: initializing default DistributedEvaluator.client
[00:49.10] MainProcess/INFO:   max_n_workers=15, actual n_workers=15
[00:49.10] MainProcess/INFO:   n_workers=15
C:\Users\aditya.gore\.conda\envs\emat\lib\site-packages\distributed\node.py:160: UserWarning:

Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 60489 instead

[00:50.96] MainProcess/INFO: completed initializing default DistributedEvaluator.client
[00:54.27] MainProcess/INFO: AsyncExperimentalDesign.run dispatching experiments
[00:54.36] MainProcess/INFO: performing 300 scenarios/policies * 1 model(s) = 300 experiments
[00:54.37] MainProcess/INFO: experiments in asynchronous evaluator
[00:54.38] MainProcess/INFO: AsyncExperimentalDesign.run dispatching task complete
In [15]:
background.progress() # Initially everything is pending
Out[15]:
'300 runs: 130 done, 170 failed'
In [16]:
background.status()
Out[16]:
experiment
1                                                   done
2      FAILED EXPERIMENT 2: Command '['Rscript.exe', ...
3      FAILED EXPERIMENT 3: Command '['Rscript.exe', ...
4                                                   done
5      FAILED EXPERIMENT 5: Command '['Rscript.exe', ...
                             ...                        
296    FAILED EXPERIMENT 296: Command '['Rscript.exe'...
297    FAILED EXPERIMENT 297: Command '['Rscript.exe'...
298    FAILED EXPERIMENT 298: Command '['Rscript.exe'...
299    FAILED EXPERIMENT 299: Command '['Rscript.exe'...
300    FAILED EXPERIMENT 300: Command '['Rscript.exe'...
Length: 300, dtype: object
In [17]:
results = await background.final_results()
results
C:\Users\aditya.gore\.conda\envs\emat\lib\site-packages\emat\model\asynchronous.py:157: UserWarning:

300 runs: 130 done, 170 failed

Out[17]:
LUDENSITYMIX INTDENSITYSCEN HHPOPGROWTHRATE INCOMEGROWTHRATE LDVECODRVSCEN HouseholdDvmt HouseholdCarSvcDvmt ComSvcDvmt PTVanDvmt LdvDvmt ... TotalDvmt TotalGGE AverageLdvMpg HouseholdCO2e MetroHhDvmt MetroAveHhDvmtPerHh MetroAveHhDvmtPerPrsn MetroAveHhDvmtPerVeh MetroHhGGE MetroHhCO2e
experiment
1 0.256121 0.990824 mid 0.919153 0.736541 5.485794e+07 452165.589148 5.923499e+06 87334.962793 6.086877e+07 ... 6.335914e+07 1.277835e+06 48.780445 1.313707e+10 1.852563e+07 24.510340 9.758614 12.317947 403111.340918 1.785584e+06
2 0.189457 0.943774 high 1.127183 0.328131 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 0.044362 0.809831 low 0.951936 0.520699 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 0.178956 0.893921 mid 0.923353 0.355188 5.694313e+07 477227.710243 5.923499e+06 87334.962793 6.295396e+07 ... 6.560023e+07 1.320991e+06 48.764963 1.364755e+10 1.930274e+07 25.538497 10.169451 12.767357 420617.804350 1.861412e+06
5 0.032498 0.860648 mid 0.953031 0.618596 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
296 0.228483 0.952907 mid 1.099318 0.834123 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
297 0.086440 0.898342 high 0.970182 0.965528 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
298 0.267638 0.886844 low 1.049866 0.590587 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
299 0.039768 0.938527 high 1.125158 0.330551 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
300 0.152111 0.866245 high 1.079549 0.589367 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

300 rows × 22 columns

In [18]:
from emat.model import create_metamodel
from emat.util.loggers import timing_log

with timing_log("create metamodel"):
    mm = create_metamodel(fx.scope, results[~results.HouseholdDvmt.isna()], suppress_converge_warnings=True)
[09:58:55.30] MainProcess/CRITICAL: <TIME BEGINS> create metamodel
[09:58:55.30] MainProcess/INFO: creating metamodel from data
[10:00:27.31] MainProcess/CRITICAL: < TIME ENDS > create metamodel <1m 32.01s>
In [19]:
with timing_log("crossvalidate metamodel"):
    display(mm.cross_val_scores())
[10:00:32.11] MainProcess/CRITICAL: <TIME BEGINS> crossvalidate metamodel
  Cross Validation Score
HouseholdDvmt 1.0000
HouseholdCarSvcDvmt 0.9996
ComSvcDvmt 1.0000
PTVanDvmt 1.0000
LdvDvmt 1.0000
HvyTruckDvmt 0.9998
BusDvmt 1.0000
TotalDvmt 1.0000
TotalGGE 1.0000
AverageLdvMpg 0.9851
HouseholdCO2e 1.0000
MetroHhDvmt 1.0000
MetroAveHhDvmtPerHh 1.0000
MetroAveHhDvmtPerPrsn 1.0000
MetroAveHhDvmtPerVeh 1.0000
MetroHhGGE 0.9999
MetroHhCO2e 0.9999
[10:01:44.13] MainProcess/CRITICAL: < TIME ENDS > crossvalidate metamodel <1m 12.02s>
In [20]:
from emat.analysis import display_experiments
display_experiments(fx.scope, results)

HouseholdDvmt

HouseholdCarSvcDvmt

ComSvcDvmt

PTVanDvmt

LdvDvmt

HvyTruckDvmt

BusDvmt

TotalDvmt

TotalGGE

AverageLdvMpg

HouseholdCO2e

MetroHhDvmt

MetroAveHhDvmtPerHh

MetroAveHhDvmtPerPrsn

MetroAveHhDvmtPerVeh

MetroHhGGE

MetroHhCO2e

In [25]:
from emat.workbench.analysis import cart
of_interest = results.HouseholdDvmt.isna()
cart_alg = cart.CART(
    fx.read_experiment_parameters(design_name='exp_300'),
    of_interest,
)
cart_alg.build_tree()
In [26]:
from emat.util.xmle import Show
Show(cart_alg.show_tree(format='svg'))
Out[26]:
Tree 0 INCOMEGROWTHRATE <= 1.041 gini = 0.491 samples = 300 value = [130, 170] 1 INCOMEGROWTHRATE <= 0.976 gini = 0.355 samples = 169 value = [130, 39] 0->1 True 8 gini = 0.0 samples = 131 value = [0, 131] 0->8 False 2 INCOMEGROWTHRATE <= 0.942 gini = 0.488 samples = 92 value = [53, 39] 1->2 7 gini = 0.0 samples = 77 value = [77, 0] 1->7 3 gini = 0.0 samples = 50 value = [50, 0] 2->3 4 LDVECODRVSCEN <= 0.552 gini = 0.133 samples = 42 value = [3, 39] 2->4 5 gini = 0.0 samples = 19 value = [0, 19] 4->5 6 gini = 0.227 samples = 23 value = [3, 20] 4->6
In [27]:
cart_alg.boxes_to_dataframe(include_stats=True)
Out[27]:
Box Statistics INCOMEGROWTHRATE LDVECODRVSCEN
coverage density gini entropy res dim mass min max min max
box 0 0.000000 0.000000 0.000000 0.000000 1 0.166667 NaN 0.941627 NaN NaN
box 1 0.111765 1.000000 0.000000 0.000000 2 0.063333 0.941627 0.976491 NaN 0.55213
box 2 0.117647 0.869565 0.226843 0.558629 2 0.076667 0.941627 0.976491 0.55213 NaN
box 3 0.000000 0.000000 0.000000 0.000000 1 0.256667 0.976491 1.04097 NaN NaN
box 4 0.770588 1.000000 0.000000 0.000000 1 0.436667 1.04097 NaN NaN NaN
In [33]:
valid_exp = ~results.HouseholdDvmt.isna()
In [34]:
of_interest = (results[valid_exp].HouseholdCO2e < 30E9)
cart_alg = cart.CART(
    fx.read_experiment_parameters(design_name='exp_300')[valid_exp],
    of_interest,
)
cart_alg.build_tree()
In [35]:
Show(cart_alg.show_tree(format='svg'))
Out[35]:
Tree 0 INCOMEGROWTHRATE <= 1.002 gini = 0.26 samples = 130 value = [20, 110] 1 gini = 0.0 samples = 83 value = [0, 83] 0->1 True 2 HHPOPGROWTHRATE!?!high <= 0.5 gini = 0.489 samples = 47 value = [20, 27] 0->2 False 3 INCOMEGROWTHRATE <= 1.029 gini = 0.264 samples = 32 value = [5, 27] 2->3 6 gini = 0.0 samples = 15 value = [15, 0] 2->6 4 gini = 0.0 samples = 22 value = [0, 22] 3->4 5 gini = 0.5 samples = 10 value = [5, 5] 3->5
In [36]:
cart_alg.boxes_to_dataframe(include_stats=True)
Out[36]:
Box Statistics HHPOPGROWTHRATE INCOMEGROWTHRATE
coverage density gini entropy res dim mass min max min max
box 0 0.509091 1.00000 0.000000 0.000000 2 0.430769 {mid, low} {mid, high, low} NaN 1.001809
box 1 0.200000 1.00000 0.000000 0.000000 2 0.169231 {mid, low} {mid, low} 1.001809 1.029172
box 2 0.045455 0.50000 0.500000 1.000000 2 0.076923 {mid, low} {mid, low} 1.029172 NaN
box 3 0.245455 0.84375 0.263672 0.625262 2 0.246154 {mid, low} {mid, high, low} 1.001809 NaN
In [ ]: